@@ -1,5 +1,6 @@ |
||
| 1 | 1 |
# Changes |
| 2 | 2 |
|
| 3 |
+* Jul 30, 2015 - RssAgent can configure the order of events created via `events_order`. |
|
| 3 | 4 |
* Jul 29, 2015 - WebsiteAgent can configure the order of events created via `events_order`. |
| 4 | 5 |
* Jul 29, 2015 - DataOutputAgent can configure the order of events in the output via `events_order`. |
| 5 | 6 |
* Jul 20, 2015 - Control Links (used by the SchedularAgent) are correctly exported in Scenarios. |
@@ -9,6 +9,8 @@ module Agents |
||
| 9 | 9 |
can_dry_run! |
| 10 | 10 |
default_schedule "every_1d" |
| 11 | 11 |
|
| 12 |
+ DEFAULT_EVENTS_ORDER = [['{{date_published}}', 'time'], ['{{last_updated}}', 'time']]
|
|
| 13 |
+ |
|
| 12 | 14 |
description do |
| 13 | 15 |
<<-MD |
| 14 | 16 |
This Agent consumes RSS feeds and emits events when they change. |
@@ -29,6 +31,12 @@ module Agents |
||
| 29 | 31 |
* `disable_url_encoding` - Set to `true` to disable url encoding. |
| 30 | 32 |
* `user_agent` - A custom User-Agent name (default: "Faraday v#{Faraday::VERSION}").
|
| 31 | 33 |
* `max_events_per_run` - Limit number of events created (items parsed) per run for feed. |
| 34 |
+ |
|
| 35 |
+ # Ordering Events |
|
| 36 |
+ |
|
| 37 |
+ #{description_events_order}
|
|
| 38 |
+ |
|
| 39 |
+ In this Agent, the default value for `events_order` is `#{DEFAULT_EVENTS_ORDER.to_json}`.
|
|
| 32 | 40 |
MD |
| 33 | 41 |
end |
| 34 | 42 |
|
@@ -70,6 +78,11 @@ module Agents |
||
| 70 | 78 |
end |
| 71 | 79 |
|
| 72 | 80 |
validate_web_request_options! |
| 81 |
+ validate_events_order |
|
| 82 |
+ end |
|
| 83 |
+ |
|
| 84 |
+ def events_order |
|
| 85 |
+ super.presence || DEFAULT_EVENTS_ORDER |
|
| 73 | 86 |
end |
| 74 | 87 |
|
| 75 | 88 |
def check |
@@ -77,26 +90,15 @@ module Agents |
||
| 77 | 90 |
response = faraday.get(url) |
| 78 | 91 |
if response.success? |
| 79 | 92 |
feed = FeedNormalizer::FeedNormalizer.parse(response.body) |
| 80 |
- feed.clean! if interpolated['clean'] == 'true' |
|
| 93 |
+ feed.clean! if boolify(interpolated['clean']) |
|
| 81 | 94 |
max_events = (interpolated['max_events_per_run'].presence || 0).to_i |
| 82 | 95 |
created_event_count = 0 |
| 83 |
- feed.entries.sort_by { |entry| [entry.date_published, entry.last_updated] }.each.with_index do |entry, index|
|
|
| 96 |
+ sort_events(feed_to_events(feed)).each.with_index do |event, index| |
|
| 84 | 97 |
break if max_events && max_events > 0 && index >= max_events |
| 85 |
- entry_id = get_entry_id(entry) |
|
| 98 |
+ entry_id = event.payload[:id] |
|
| 86 | 99 |
if check_and_track(entry_id) |
| 87 | 100 |
created_event_count += 1 |
| 88 |
- create_event(payload: {
|
|
| 89 |
- id: entry_id, |
|
| 90 |
- date_published: entry.date_published, |
|
| 91 |
- last_updated: entry.last_updated, |
|
| 92 |
- url: entry.url, |
|
| 93 |
- urls: entry.urls, |
|
| 94 |
- description: entry.description, |
|
| 95 |
- content: entry.content, |
|
| 96 |
- title: entry.title, |
|
| 97 |
- authors: entry.authors, |
|
| 98 |
- categories: entry.categories |
|
| 99 |
- }) |
|
| 101 |
+ create_event(event) |
|
| 100 | 102 |
end |
| 101 | 103 |
end |
| 102 | 104 |
log "Fetched #{url} and created #{created_event_count} event(s)."
|
@@ -122,5 +124,22 @@ module Agents |
||
| 122 | 124 |
true |
| 123 | 125 |
end |
| 124 | 126 |
end |
| 127 |
+ |
|
| 128 |
+ def feed_to_events(feed) |
|
| 129 |
+ feed.entries.map { |entry|
|
|
| 130 |
+ Event.new(payload: {
|
|
| 131 |
+ id: get_entry_id(entry), |
|
| 132 |
+ date_published: entry.date_published, |
|
| 133 |
+ last_updated: entry.last_updated, |
|
| 134 |
+ url: entry.url, |
|
| 135 |
+ urls: entry.urls, |
|
| 136 |
+ description: entry.description, |
|
| 137 |
+ content: entry.content, |
|
| 138 |
+ title: entry.title, |
|
| 139 |
+ authors: entry.authors, |
|
| 140 |
+ categories: entry.categories |
|
| 141 |
+ }) |
|
| 142 |
+ } |
|
| 143 |
+ end |
|
| 125 | 144 |
end |
| 126 | 145 |
end |
@@ -66,6 +66,21 @@ describe Agents::RssAgent do |
||
| 66 | 66 |
expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"]) |
| 67 | 67 |
end |
| 68 | 68 |
|
| 69 |
+ it "should emit items as events in the order specified in the events_order option" do |
|
| 70 |
+ expect {
|
|
| 71 |
+ agent.options['events_order'] = ['{{title | replace_regex: "^[[:space:]]+", "" }}']
|
|
| 72 |
+ agent.check |
|
| 73 |
+ }.to change { agent.events.count }.by(20)
|
|
| 74 |
+ |
|
| 75 |
+ first, *, last = agent.events.last(20) |
|
| 76 |
+ expect(first.payload['title'].strip).to eq('upgrade rails and gems')
|
|
| 77 |
+ expect(first.payload['url']).to eq("https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01")
|
|
| 78 |
+ expect(first.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"]) |
|
| 79 |
+ expect(last.payload['title'].strip).to eq('Dashed line in a diagram indicates propagate_immediately being false.')
|
|
| 80 |
+ expect(last.payload['url']).to eq("https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535")
|
|
| 81 |
+ expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"]) |
|
| 82 |
+ end |
|
| 83 |
+ |
|
| 69 | 84 |
it "should track ids and not re-emit the same item when seen again" do |
| 70 | 85 |
agent.check |
| 71 | 86 |
expect(agent.memory['seen_ids']).to eq(agent.events.map {|e| e.payload['id'] })
|